In [1]:
import pickle
import faiss


Loading faiss.

In [15]:
def load_data():
    with open('movies.pickle', 'rb') as f:
        data = pickle.load(f)
    return data

data = load_data()
data


Out[15]:
{'name': array(['Toy Story (1995)', 'GoldenEye (1995)', 'Four Rooms (1995)', ...,
        'Sliding Doors (1998)', 'You So Crazy (1994)',
        'Scream of Stone (Schrei aus Stein) (1991)'], dtype=object),
 'vector': array([[-0.01780608, -0.14265831,  0.10308606, ...,  0.09659795,
         -0.17529577, -0.03061521],
        [-0.03357764,  0.16418771,  0.21801303, ...,  0.16502103,
         -0.09166156,  0.05047869],
        [-0.2761452 , -0.01991325, -0.04969981, ...,  0.0258275 ,
         -0.08328608, -0.0152858 ],
        ...,
        [ 0.05142734, -0.01683608, -0.20441587, ...,  0.00045828,
          0.14679626,  0.2462584 ],
        [ 0.04491899, -0.02819411, -0.09472758, ..., -0.02152078,
          0.16223577,  0.19897607],
        [ 0.02531924,  0.03099714,  0.06437534, ..., -0.07260127,
          0.0467432 ,  0.07893164]], dtype=float32)}

In [16]:
class BruteForceIndex():
    def __init__(self, vectors, labels):
        self.dimention = vectors.shape[1]
        self.vectors = vectors.astype('float32')
        self.labels = labels


    def build(self):
        self.index = faiss.IndexFlatL2(self.dimention)
        self.index.train(self.vectors)
        self.index.add(self.vectors)
        
    def query(self, vectors, k=10):
        distances, indices = self.index.search(vectors, k) 
        return [self.labels[i] for i in indices[0]]

In [17]:
index = BruteForceIndex(data["vector"], data["name"])
index.build()

In [18]:
movie_vector, movie_name = data['vector'][90:91], data['name'][90]
simlar_movies_names = '\n* '.join(index.query(movie_vector))
print(f"The most similar movies to {movie_name} are:\n* {simlar_movies_names}")


The most similar movies to Nightmare Before Christmas, The (1993) are:
* Nightmare Before Christmas, The (1993)
* Heavy Metal (1981)
* Sirens (1994)
* Beauty and the Beast (1991)
* Akira (1988)
* Fantasia (1940)
* Benny & Joon (1993)
* Barbarella (1968)
* Pete's Dragon (1977)
* James and the Giant Peach (1996)

In [ ]: